In [1]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
# IF you don't have plotly installed then comment out the following lines. 
#    I am using plotly below to make interactive plots which are nice... but not
#    absolutely necessary
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

Example Data

Lets first look at some numbers drawn randomly from a normal distribution (the classic bell curve distribution). In the first line of the following code I'm setting the random number seed so that you can get the exact same response I did. IF you want to change slightly every time you run this, then comment out that line

In [2]:
np.random.seed(69875)
mu, sigma = 15, 3 # mean and standard deviation
normal_dist = np.random.normal(loc=mu, scale=sigma, size=200)

At their heart, both the histogram and the CDF (Cumulative Distribution Function) are displaying similar information, but in different ways. A histogram can be thought of as an empirical estimation of the Probability Density Function (PDF) and represents the probability with areas. Technically the PDF would represent this with an "area under the curve". Histograms are bar plots... so I guess we can say they represent this with the area of a bar! The CDF represents probability with vertical distances and is cumulative.

Histogram

First we'll look at the histogram. Below are two functions for plotting histograms. They are larger functions than you need for just a basic histogram, but that's because I like to add some annotations to my plots. The second function uses plotly to create an interactive histogram so you can hover over each bin and have the relavent info pop up.

In [3]:
def plot_histogram(data, bins=15, ax=None, hist_color='green', figsize=(5, 5), estimate_pdf=True,
                   pdf_color='purple', stat_box_loc=(0.05, 0.95), annotate_bin_idx=False,
                   annotate_color='blue', show_mean=True, show_median=True,
                   distribution_name="Normal", return_fig=True, density=True):
    '''
    Plot up the histogram with maybe some annotations. You can set the bin idx and it will highlight that bin
    and plot up the probability value for that bin. Uses matplotlib histogram with density=True
    
    Parameters:
        data: np.array
            the distribution you want to plot up
        bins:(int or 'auto')
            the number of bins to use when binning your data
        ax:matplotlib.axes
            a matplotlib axes instance if you are creating that beforehand. Otherwise I'll create one
        hist_color: str
            any matplotlib recognized color
        figsize: tuple
            (width, height)
        estimate_pdf:bool
            Whether to plot up the estimated pdf
        pdf_color: str
            any matplotlib recognized color
        stat_box_loc: tuple
            (x, y) coordinate in figure space not data space (0, 0) is bottom left of figure
        annotate_bin_idx: int or list of ints
            Will highlight a bin(s) and plot the probability
        annotate_color: str
            any matplotlib recognized color
        show_mean: bool
            plot a vertical line on the mean
        show_median: bool
            plot a vertical line on the median
        distribution_name: str
            what name to put in title
        density: bool
            converts the y-axis to probabilities instead of frequency count
    Returns:
        fig, ax
        ax        
    '''
    mean = data.mean()
    median = np.median(data)
    standard_deviation = data.std()
    textstr = '\n'.join((rf'$\mu={mean:.1f}$',
                         rf'$median={median:.1f}$',
                         rf'$\sigma={standard_deviation:.1f}$')
                       )
    data_min, data_max = data.min(), data.max() 
    data_delta = data_max - data_min
    data_pad = data_delta * 0.03
    x_limits = (data_min- data_pad, data_max + data_pad)
    
    
    if not ax:
        return_fig = True
        fig, ax = plt.subplots(figsize=figsize)
    else:
        return_fig = False
    # Plot Histogram
    n, bins, patches = ax.hist(x=data, bins=bins, color=hist_color,
                               density=density, alpha=0.7, rwidth=0.9, label='Histogram')
    bin_centers = 0.5*(bins[1:] + bins[:-1])
    # Plot PDF
    if estimate_pdf:
        pdf = stats.norm.pdf(bin_centers, mean, standard_deviation)
        ax.plot(bin_centers, pdf,
                linewidth=2, color=pdf_color, label='PDF')
    # plot distribution Stats
    props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
    ax.text(stat_box_loc[0], stat_box_loc[1], textstr, transform=ax.transAxes, fontsize=10,
            verticalalignment='top', bbox=props)
    # Plot mean and median
    prob_max = np.max(n)
    y_limits = (0, prob_max * 1.05)
    if show_mean:
        ax.plot([mean, mean], [0, y_limits[1]], color='orange', ls='--', label='mean')
    if show_median:
        ax.plot([median, median], [0, y_limits[1]], color='brown', ls='--', label='median')
    
    # highlight example bin
    if annotate_bin_idx:
        ax.plot([x_limits[0], bin_centers[annotate_bin_idx]], [n[annotate_bin_idx], n[annotate_bin_idx]],
                 color=annotate_color, ls='--')
        text_loc_x = x_limits[0] + (bin_centers[annotate_bin_idx] - data_min) * 0.6
        tex_loc_y = n[annotate_bin_idx] + 0.005
        ax.text(text_loc_x, tex_loc_y, '{0:.1%}'.format(n[annotate_bin_idx]),
                fontsize=12, color=annotate_color)

        highlited_bin = ax.add_patch(patches[annotate_bin_idx])
        highlited_bin.set_edgecolor(annotate_color)
        highlited_bin.set_linewidth(3)
    
    # Format Plot
    ax.set_xticks(bins)
    ax.tick_params(axis='x', labelrotation=90)
    ax.set_xlim(x_limits)
    ax.set_ylim(y_limits)
    if density:
        ax.set_ylabel('probability')
    else:
        ax.set_ylabel('frequency count')
    ax.set_xlabel('bin ranges')
    ax.set_title(f'Histogram of a {distribution_name} Distribution')
    ax.legend()
    
    if return_fig:
        return fig, ax
    return ax

def plotly_histogram(data, bins=15, hist_color='green', figsize=(500, 500), estimate_pdf=True,
                     pdf_color='purple', stat_box_loc=(0.02, 0.98), 
                     show_mean=True, show_median=True, distribution_name="Normal"):
    '''
    Create interactive histogram plots with plotly. Add options for some extra annotations. 
    np.histogram with density=True
    
    Parameters:
        data: np.array
            the distribution you want to plot up
        bins:(int or 'auto')
            the number of bins to use when binning your data
        hist_color: str
            any plotly recognized color string
        figsize: tuple
            (width, height) in pixels
        estimate_pdf:bool
            Whether to plot up the estimated pdf
        pdf_color: str
            any plotly recognized color string
        stat_box_loc: tuple
            (x, y) coordinate in figure space not data space (0, 0) is bottom left of figure
        show_mean: bool
            plot a vertical line on the mean
        show_median: bool
            plot a vertical line on the median
        distribution_name: str
            what name to put in title
    
    Returns:
        fig: go.Figure()
    '''
    # Get Stats on Distribution
    mean = data.mean()
    median = np.median(data)
    standard_deviation = data.std()
    stat_strings = '<br>'.join([f'Mean = {mean:.1f}'.format(mean), 
                                f'Median = {median:.1f}',
                                f'S.D. = {standard_deviation:.1f}']
                               )
    
    # Use numpy to create the histogram and PDF
    n, bins = np.histogram(data, bins=bins, density=True)
    bin_centers = 0.5*(bins[1:] + bins[:-1])
    str_bins = ['{0:.2f}'.format(x) for x in bins]
    
    if estimate_pdf:
        pdf = stats.norm.pdf(bin_centers, mean, standard_deviation)
    
    # Set up the hover template for plotly
    # hover_template = '<i>Probability: %{y:.1%}' + '<br>value: %{x:.2f}'
    hover_template = '<i>Probability: %{y:.1%}'
    
    # Create figure
    fig = go.Figure(data=[go.Bar(x=bin_centers, y=n, marker_color=hist_color,
                                 hovertemplate=hover_template, name="Histogram")],
                    layout=go.Layout(bargap=0.05))
    if estimate_pdf:
        fig.add_trace(go.Scatter(x=bin_centers, y=pdf, line={'color':pdf_color},
                                 hovertemplate=hover_template, name="PDF"))
    # Add mean and median lines
    if show_mean:
        fig.add_trace(go.Scatter(x=[mean, mean], y=[0, n.max()], 
                                 line={'color':'orange', 'dash':'dash'},
                                 mode='lines', name='Mean', hoverinfo='skip'))
    if show_median:
        fig.add_trace(go.Scatter(x=[median, median], y=[0, n.max()], 
                                 line={'color':'brown', 'dash':'dash'},
                                 mode='lines', name='Median', hoverinfo='skip'))
    # add stats annotation
    fig.add_annotation(x=stat_box_loc[0], y=stat_box_loc[1], text=stat_strings,
                       bordercolor='black', bgcolor='tan', showarrow=False,
                       xref='paper', yref='paper',
                       align='left', font=dict(size=10))
    # set up fig layout
    fig.update_layout(title={'text':f'Histogram of a {distribution_name} Distribution',
                             'x':0.5, 'xref':'paper', 'y':0.85}, 
                      width=figsize[0], height=figsize[1],
                      xaxis=dict(title='Bin Ranges', tickmode='array',
                                 tickvals=bins, ticktext=str_bins, tickangle=-90),
                      yaxis=dict(title='Probability'))
    return fig

Histograms are one commonly used graphical representation of a distribution of numbers. They are typically plotted in a bar graph style plot where the height of each bar shows the frequency of a bin and represents the probability that a number will fall within that bin. This is in essence a slice of the area under a curve. The width of the bar represents the bin size used to group the numbers (or the width of the slice). The total width of all bars shows the range of values in the distribution. Here's an example showing the histogram and the estimated PDF for my normal distribution:

In [5]:
fig, ax = plt.subplots(figsize=(5,5))
n, bins, patches = ax.hist(x=normal_dist, bins=15,
                           density=True, alpha=0.7, rwidth=0.9, label='Histogram')
In [10]:
n
Out[10]:
array([0.00935439, 0.01403158, 0.02806316, 0.06080351, 0.07483509,
       0.09354386, 0.12628422, 0.08886667, 0.1356386 , 0.09354386,
       0.08886667, 0.06080351, 0.03274035, 0.01870877, 0.00935439])
In [7]:
n.sum()
Out[7]:
0.935438642426952
In [9]:
bin_centers = 0.5*(bins[1:] + bins[:-1])
np.trapz(n, bin_centers)
Out[9]:
0.9899999999999998
In [11]:
fig, ax = plt.subplots(figsize=(5,5))
n, bins, patches = ax.hist(x=normal_dist, bins=15,
                           density=False, alpha=0.7, rwidth=0.9, label='Histogram')
In [12]:
n
Out[12]:
array([ 2.,  3.,  6., 13., 16., 20., 27., 19., 29., 20., 19., 13.,  7.,
        4.,  2.])
In [20]:
fig, ax = plt.subplots(figsize=(5,5))
weights = np.ones_like(normal_dist)/len(normal_dist)
n, bins, patches = ax.hist(x=normal_dist, bins=15, weights=weights,
                           density=False, alpha=0.7, rwidth=0.9, label='Histogram')
In [21]:
n
Out[21]:
array([0.01 , 0.015, 0.03 , 0.065, 0.08 , 0.1  , 0.135, 0.095, 0.145,
       0.1  , 0.095, 0.065, 0.035, 0.02 , 0.01 ])
In [22]:
n.sum()
Out[22]:
1.0
In [24]:
heights,bins = np.histogram(normal_dist,bins=15)
heights = heights/sum(heights)
fig, ax = plt.subplots(figsize=(5,5))
ax.bar(bins[:-1],heights,width=(max(bins) - min(bins))/len(bins), color="blue", alpha=0.5)
Out[24]:
<BarContainer object of 15 artists>
In [ ]:
fig, ax = plot_histogram(normal_dist)
fig.savefig('images/normal_histogram_pdf.png', dpi=150, bbox_inches='tight')

I prefer to always plot histograms in density=True mode. It just makes more sense to me. If you don't use the density mode then it plots frequency, which is I fend less usefull since I am usually thinking more in terms of probabilities. The Frequency doesn't really mean anything without also knowing the how many samples are in your distribution. Here's an example of the density versus frequency plot

In [ ]:
fig, ax = plt.subplots(ncols=2, figsize=(10, 5))
ax[0] = plot_histogram(normal_dist, ax=ax[0], estimate_pdf=False)
ax[1] = plot_histogram(normal_dist, ax=ax[1], density=False, estimate_pdf=False)
fig.savefig('images/normal_histogram_density_compare.png', dpi=150, bbox_inches='tight')

Ok so you probably already knew what a histogram was, and you might already know how to read a histogram, but to make sure we are on the same page lets look at what the histogram shows us. We'll take the same normal distribution and plot it's histogram, but this time in an interactive plot so you can look at some of the values yourself. If you don't have plotly, then there are some static plots further down.

In [ ]:
fig = plotly_histogram(normal_dist)
fig.show()
fig.write_html('images/2020-04-normal_histogram_pdf.html')

First there are a some general pieces of information we can see from the plot without plotting any annotation. These are the range of values, probability of a value within a bin, the general shape of the distribution, the skewness of the distribution, and possible outliers. So lets look at those with the above plot:

| Range of values | 6.39 to 22.42 |

| Probability of value in 4th bin | ~6% |

| Shape of the distribution | Typical normal "bell curve" |

| Skewness | None |

| Outliers | None |

In the above table, we did say there wasn't any skewness. How do we know that? Well some of it is based on our own loosy-goosy feeling of what the shape looks like. The other way we can get a quick sense is to also plot the mean and median lines and see if they "look" close together. Which brings up another interesting point. You can't really tell what the mean or median of your distribution is without plotting them. It's a non-trivial probablem to add up in our heads the probabilities of all the different bins and figure out which bin lies in the middle. Here it might look easy because we are dealing with a normal distribution. In Part 2, we'll look at a bunch of different distributions, such as the log normal distribution, and you can see how hard it is to actually tell intuitively where your median value is.

In [ ]:
fig, ax = plot_histogram(normal_dist, estimate_pdf=False, show_median=True,
                         show_mean=False, annotate_bin_idx=3)
fig.savefig('images/normal_histogram_annotated.png', dpi=150, bbox_inches='tight')

CDF

Now we'll look at the cumulative distribution function (aka. CDF) which is another graphical representation of the distribution of numbers (discrete, or continuous). Below are a couple of usefull functions. First is a quick function to create the x, y values for the cdf plot and the second two are fancy plots functions for the cdf. Comment out the plotly one if you aren't using plotly

In [ ]:
def ecdf(data):
    '''
    Take an array of your distribution values and return
    an emperical cdf
    
    Paramaters:
        data:np.array or list
        
    Returns:
        sorted_array, yidx_array
    '''
    if isinstance(data, list): data = np.array(data)
    
    # sort unique data
    x_values = np.sort(data)
    # calculate cumulative probability
    d_size = len(x_values)
    y_values = np.arange(1, d_size + 1) / d_size 

    return x_values, y_values

def plot_ecdf(data, cdf_color='green', ax=None, figsize=(5, 5), stat_box_loc=(0.03, 0.75),
              percentiles_to_annotate=0.5, annotate_colors='#c51b8a',
              distribution_name='Normal', return_fig=False):
    '''
    Given a distribution plot the cdf and stats for the cdf. You can also pass it some percentiles and it will
    annotate those with lines and values on the plot
    
    Parameters:
        data: np.array
            the distribution you want to plot up
        cdf_color: str
            any matplotlib recognized color
        ax:matplotlib.axes
            a matplotlib axes instance if you are creating that beforehand. Otherwise I'll create one
        figsize: tuple
            (width, height)
        stat_box_loc: tuple
            (x, y) coordinate in figure space not data space (0, 0) is bottom left of figure
        percentiles_to_annotate: (float, [floats] or False)
            if you want to plot some lines and annotations for specific but in the decimal
            form here. i.e. [0.2, 0.8] for the P20 and the P80
        annotate_color: str
            any matplotlib recognized color. Must be same number of colors as percentiles_to_annotate
            unless False was passed to the percentiles
        distribution_name: str
            what name to put in title
        return_fig: bool
            to return fig or not. will always return at least ax
            
    Returns:
        fig, ax
        ax
    '''
    x, y = ecdf(data)
    median = np.median(normal_dist)
    standard_deviation = normal_dist.std()
    textstr = '\n'.join((f'median={median:.1f}',
                     rf'$\sigma={standard_deviation:.1f}$')
                       )
    data_min = data.min()
    data_max = data.max()
    data_delta = data_max - data_min
    data_pad = data_delta * 0.03
    x_limits = (data_min- data_pad, data_max + data_pad)
    if not ax:
        return_fig = True
        fig, ax = plt.subplots(figsize=figsize)
    else:
        return_fig = False
    ax.plot(x, y, color=cdf_color, label='CDF')
    
    # plot distribution Stats
    props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
    ax.text(stat_box_loc[0], stat_box_loc[1], textstr, transform=ax.transAxes, fontsize=10,
            verticalalignment='top', bbox=props)
    
    # Annotate the Percentiles if Passed
    if percentiles_to_annotate:
        if isinstance(percentiles_to_annotate, float):
            percentiles_to_annotate =[ percentiles_to_annotate]
        if isinstance(annotate_colors, str):
            annotate_colors = [annotate_colors]
        for idx, perc in enumerate(percentiles_to_annotate):
            idx_p = np.where(y == perc)[0][0] # Could change this to something fancy values don't line up
            perc_value = x[idx_p]
            perc_label = "{0}".format(perc * 100)
            ax.plot([perc_value, perc_value], [0, perc], color=annotate_colors[idx],
                    ls='--', label=f'P{perc_label}')
            ax.plot([x_limits[0], perc_value], [perc , perc],
                    color=annotate_colors[idx], ls='--')
            # Plot Percentile Text
            text_loc_horz = x_limits[0] + (perc_value - data_min) * 0.6
            text_loc_vert = perc + 0.01
            ax.text(text_loc_horz, text_loc_vert, '{0} %'.format(perc_label),
                    fontsize=12, color=annotate_colors[idx])
            # Plot Value Text
            text_loc_horz = perc_value + 0.1
            text_loc_vert = perc * 0.7
            ax.text(text_loc_horz, text_loc_vert, f'{perc_value:.1f}',
                    fontsize=12, color=annotate_colors[idx])
        # Format Plot
        ax.set_xlim(x_limits)
        ax.set_ylim(0, 1)
        #ax.tick_params(axis='x', labelrotation=90)
        ax.set_ylabel('probability (at or below value)')
        ax.set_xlabel('Value')
        ax.set_title(f'CDF of a {distribution_name} Distribution')
        ax.grid()
        ax.legend(loc=0)
    if return_fig:
        return fig, ax
    return ax

def plotly_ecdf(data, cdf_color='green', figsize=(500, 500), stat_box_loc=(0.02, 0.98),  
                distribution_name='Normal', plot_p50_line=True):
    '''
    Given a distribution plot the cdf and stats for the cdf. Creates an interactive plot using plotly
    
    Parameters:
        data: np.array
            the distribution you want to plot up
        cdf_color: str
            any matplotlib recognized color
        ax:matplotlib.axes
            a matplotlib axes instance if you are creating that beforehand. Otherwise I'll create one
        figsize: tuple
            (width, height)
        stat_box_loc: tuple
            (x, y) coordinate in figure space not data space (0, 0) is bottom left of figure\
        distribution_name: str
            what name to put in title
        plot_p50_line: bool
            Can plot a dashed line hightlighting the median (50th percentile)
            
    Returns:
        fig: go.Figure()
    '''
    x, y = ecdf(data)
    median = np.median(normal_dist)
    standard_deviation = normal_dist.std()
    data_min = data.min()
    stat_strings = '<br>'.join([f'Median = {median:.1f}',
                                f'S.D. = {standard_deviation:.1f}']
                               )
    hover_template = '<i>Probability: %{y:.1%}' + '<br>value: %{x:.2f}'
    
    # Create figure
    fig = go.Figure(go.Scatter(x=x, y=y, line={'color':cdf_color},
                               hovertemplate=hover_template, name="CDF"))
    # median lines
    if plot_p50_line:
        fig.add_trace(go.Scatter(x=[median, median, data_min], y=[0, 0.5, 0.5], 
                                 line={'color':'brown', 'dash':'dash'},
                                 mode='lines', name='Median', hoverinfo='skip'))
        
    # add stats annotation
    fig.add_annotation(x=stat_box_loc[0], y=stat_box_loc[1], text=stat_strings,
                       bordercolor='black', bgcolor='tan', showarrow=False,
                       xref='paper', yref='paper',
                       align='left', font=dict(size=10))
    # set up fig layout
    fig.update_layout(title={'text':f'CDF of a {distribution_name} Distribution',
                             'x':0.5, 'xref':'paper', 'y':0.85}, 
                      width=figsize[0], height=figsize[1],
                      xaxis=dict(title='Value'),
                      yaxis=dict(title='Probability', tick0=0, rangemode='nonnegative'))
    return fig

On the CDF plot, the y-axis represents the cumulative probability, aka the percentile of your distribution. The x-axis is the values in your distribution (ordered from least to greatest). The line is using vertical distances to show the probabilities. Here's an example of the same normal distribution.

In [ ]:
fig, ax = plot_ecdf(normal_dist)
fig.savefig('images/normal_cdf.png', dpi=150, bbox_inches='tight')

The CDF also works quite well for categorical distributions. Just be mindful of how the categories are related. It works well with ordinal but not nominal (i.e. there is no meaning to the order of the categories. for example provinces). Here's an example of the same normal distribution converted into a categorical distribution by converting all values to integers:

In [ ]:
norm_int_dist = normal_dist.astype(int)
fig, ax = plot_ecdf(norm_int_dist)
fig.savefig('images/normal_int_cdf.png', dpi=150, bbox_inches='tight')

Now that we know what a CDF is, lets start looking at what sort of information one can glean from this graph. This time we'll plot up the CDF in an interactive plot. So go ahead and play around with it some! Again, if you aren't using plotly then continue on down and there are some static plots as well

In [ ]:
fig = plotly_ecdf(normal_dist)
fig.show()
fig.write_html('images/2020-04-normal_cdf.html')

What are the general pieces of information we can see? The range of values, Percentiles (i.e. P20 is where 20% of the values are less than or equal to that value), Your median (because the median is the P50), anything related to the percentiles (i.e. what values are between the P20 and the P80, or what values lie within +/- 20% of the median). So looking at the above cdf to find the percentile we find the decimal percentile value on the Y axis (P20 = 0.2) drew a line horizontally over to our cdf line and then go straight down, that's the percentile of our distribution. So let's look at some of those values for the above plot:

| P20 | 11.6 |

| Median (P50) | 14.4 |

| Values between P20 - P80 | 11.6 to 17.2 |

| Probability of values between 11.6 and 17.2 | 80% - 20% = 60% |

What about the slope of the line? The slope of the line along the CDF also gives us some basic information. This in affect tells us how spread out values are. A steeper slope means that the there is less spread and a shallower slope gives shows a greater relative spread in your data. So if we have a relatively shallow slope at the top or bottom that could be an indication of possible outliers. Also if there is a relatively shallow slope somewhere in the middle of the distribution, that could be a sign of a bimodal distribution.

In [ ]:
perc_color_list = ['#f768a1','#c51b8a','#7a0177']
perc_list = [0.2, 0.5, 0.8]
fig, ax = plot_ecdf(normal_dist, percentiles_to_annotate=perc_list, annotate_colors=perc_color_list)
fig.savefig('images/normal_cdf_annotated.png', dpi=150, bbox_inches='tight')

Plot for a bunch of Distributions

It can be interesting to compare how different distributions look in a Histograms and a CDF. This is one area where the Histogram is much better than the CDF. It can be hard to tell the difference distribution types on a cdf. However later we'll look at comparing multiple distributions to each other, in that case the CDF shines.

In [ ]:
dist = dict()
dist_size = 500

dist['Lognormal'] = {"data": np.random.lognormal(0, 1, size=dist_size), "bin":30}
dist['Chisquare'] = {"data": np.random.chisquare(2, size=dist_size), "bin":30}
dist['Beta'] = {"data": np.random.beta(1, 4, size=dist_size), "bin":30}
dist['Gamma'] = {"data": np.random.gamma(2, 2, size=dist_size), "bin":20}

dist['Triangular'] = {"data": np.random.triangular(-3, 0, 8, size=dist_size), "bin":'auto'}
dist['Normal'] = {"data": np.random.normal(size=dist_size), "bin":'auto'}

dist['Uniform'] = {"data": np.random.uniform(size=dist_size), "bin":'auto'}
dist['Binomial'] = {"data": np.random.binomial(10, 0.5, size=dist_size), "bin":'auto'}
dist['Poisson'] = {"data": np.random.poisson(size=dist_size), "bin":'auto'}
In [ ]:
for d_type in dist.keys():
    fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(12, 4))
    ax[0] = plot_histogram(dist[d_type]['data'], ax=ax[0], bins=dist[d_type]['bin'],
                           estimate_pdf=False, distribution_name=d_type, return_fig=False)
    ax[1] = plot_ecdf(dist[d_type]['data'], ax=ax[1], distribution_name=d_type, return_fig=False)
    fig.suptitle(f'{d_type} Distribution')
    fig.savefig(f'images/compare_hist_cdf_{d_type}.png', dpi=150, bbox_inches='tight')
In [ ]: